from openai import OpenAI
import os

# add for steps reward enhancement
def call_chat_api(prompt):
    client = OpenAI(
        # 如果没有配置环境变量，请用阿里云百炼API Key替换：api_key="sk-xxx"
        api_key=os.getenv("DASHSCOPE_API_KEY", ""),
        base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
    )
    messages = [{"role": "user", "content": {prompt}}]
    completion = client.chat.completions.create(
        model="deepseek-r1",
        messages=messages,
        max_tokens=8192,
        stream=True,
        timeout=600
    )

    thought = ""
    generated = ""
    for chunk in completion:
        delta = chunk.choices[0].delta
        # 只收集思考内容
        if hasattr(delta, "reasoning_content") and delta.reasoning_content is not None:
            thought += delta.reasoning_content
        # 收到content，开始进行回复
        if hasattr(delta, "content") and delta.content:
            generated += delta.content

    # print("\n" + "=" * 20 + "思考过程" + "=" * 20)
    # print(thought, end="", flush=True)
    # print("\n" + "=" * 20 + "完整回复" + "=" * 20)
    # print(generated, end="", flush=True)
            
    return thought, generated

if __name__ == "__main__":
    call_chat_api("明天上海的天气怎么样？")